Gene lists

View results table from gene differential analysis.

differential_analysis_BMP4_CNTF

# load de outputs
res.file<-paste0(workdir, "/analysed_data/differential_analysis_BMP4_CNTF.xlsx")
dge_res_sheets <- readxl::excel_sheets(res.file)[-c(1:4)]
dge_res_sheets <- data.frame(sheets = dge_res_sheets, row.names = dge_res_sheets)
dge_res_sheets_key <- readxl::read_xlsx(res.file, sheet = "Comparison Key")

# replace long sheet name from the keys
dge_res_sheets[dge_res_sheets_key$Key, ] <- dge_res_sheets_key$Comparison

# select comparisons of interest 
dgeBulk <- imap(rownames(dge_res_sheets), ~readxl::read_xlsx(res.file, sheet = .x)) %>%
    setNames(nm = dge_res_sheets$sheets)
# dgeBulk_sel <- dgeBulk[c("BMP4_diff_3w - progenitor|untreated",
#                          "CNTF_diff_3w - progenitor|untreated")]
dgeBulk_sel <- dgeBulk[c(dge_res_sheets_key$Comparison)]
# select relevant comparisons
dgeBulk_sel = dgeBulk_sel[-c(6,8,14)]
# for each comparison, return a table of top hits
deTopList <- c()
for (i in 1:length(dgeBulk_sel)) {
    # select item from list
    de = dgeBulk_sel[[i]]
    name=names(dgeBulk_sel)[i]
    deTop = de %>% arrange(pvalue) %>% slice_head(n=100)
    deTopList <- c(deTopList, list(deTop ))
  }
names(deTopList) <- names(dgeBulk_sel)

for (i in 1:length(deTopList)) {
  
    name = names(deTopList)[[i]]
    cat('\n')
    cat('### ', name, '  \n')
    cat('\n')

  data.frame(deTopList[[i]])
  
  cat('\n')
}

BMP4_diff_2w - progenitor|untreated

BMP4_diff_3w - progenitor|untreated

BMP4_diff_3w - BMP4_diff_2w|untreated

CNTF_diff_2w - progenitor|untreated

CNTF_diff_2w - BMP4_diff_2w|untreated

CNTF_diff_3w - progenitor|untreated

CNTF_diff_3w - BMP4_diff_3w|untreated

CNTF_diff_3w - CNTF_diff_2w|untreated

CNTF_diff_3w - BMP4_diff_3w|treated

treated - untreated|BMP4_diff_3w

treated - untreated|CNTF_diff_3w

# countSig <- function(i) { 
#     # select item from list
#     de = dgeBulk_sel[[i]]
#     name=names(dgeBulk_sel)[i]
#     
#     # count significant up and down DEGs
#     deCount = de %>% filter(sig=="<= 0.05") %>% group_by(class) %>% summarise(count=n())
#     
#     return(deCount)
#   }

View counts of significant differential analysis hits.

differential_analysis_wo2W_BMP4_CNTF_wo2W

# # load differentiated BMP4 and CNTF marker genes
# res.file<-paste0(workdir, "/analysed_data/differential_analysis_wo2W_BMP4_CNTF_wo2W.xlsx")
# dge_res_sheets <- readxl::excel_sheets(res.file)[-c(1:4)]
# dge_res_sheets <- data.frame(sheets = dge_res_sheets, row.names = dge_res_sheets)
# dge_res_sheets_key <- readxl::read_xlsx(res.file, sheet = "Comparison Key")
# 
# # replace long sheet name from the keys
# dge_res_sheets[dge_res_sheets_key$Key, ] <- dge_res_sheets_key$Comparison
# 
# # select comparisons of interest 
# dgeBulk <- imap(rownames(dge_res_sheets), ~readxl::read_xlsx(res.file, sheet = .x)) %>%
#     setNames(nm = dge_res_sheets$sheets)
# # dgeBulk_sel <- dgeBulk[c("BMP4_diff_3w - progenitor|untreated",
# #                          "CNTF_diff_3w - progenitor|untreated")]
# dgeBulk_sel <- dgeBulk[c(dge_res_sheets_key$Comparison)]
# dgeBulk_sel = dgeBulk_sel[-c(7)]

View results table from gene differential analysis.

# countSig <- function(i) { 
#     # select item from list
#     de = dgeBulk_sel[[i]]
#     name=names(dgeBulk_sel)[i]
#     
#     # count significant up and down DEGs
#     deCount = de %>% filter(sig=="<= 0.05") %>% group_by(class) %>% summarise(count=n())
#     
#     return(deCount)
#   }

View counts of significant differential analysis hits.

Volcano plot

Visualising the effect sizes and significance for differential gene expression in differentiated and treated astrocytes.

differential_analysis_BMP4_CNTF

# load de outputs
res.file<-paste0(workdir, "/analysed_data/differential_analysis_BMP4_CNTF.xlsx")
dge_res_sheets <- readxl::excel_sheets(res.file)[-c(1:4)]
dge_res_sheets <- data.frame(sheets = dge_res_sheets, row.names = dge_res_sheets)
dge_res_sheets_key <- readxl::read_xlsx(res.file, sheet = "Comparison Key")

# replace long sheet name from the keys
dge_res_sheets[dge_res_sheets_key$Key, ] <- dge_res_sheets_key$Comparison

# select comparisons of interest 
dgeBulk <- imap(rownames(dge_res_sheets), ~readxl::read_xlsx(res.file, sheet = .x)) %>%
    setNames(nm = dge_res_sheets$sheets)
# dgeBulk_sel <- dgeBulk[c("BMP4_diff_3w - progenitor|untreated",
#                          "CNTF_diff_3w - progenitor|untreated")]
dgeBulk_sel <- dgeBulk[c(dge_res_sheets_key$Comparison)]
# select relevant comparisons
dgeBulk_sel = dgeBulk_sel[-c(6,8,14)]
plotl <- c()
for (i in 1:length(dgeBulk_sel)) {
  
    # select relevant comparison
    de = dgeBulk_sel[[i]]
    name=names(dgeBulk_sel)[i]
    # print(name)
    
    # ensure empty gene symbols are replaced with ensembl IDs
    de[is.na(de$symbol),]$symbol <- de[is.na(de$symbol),]$id

    # add a column of NAs
    de$diffexpressed <- NA
    # if log2Foldchange > 0.0 and pvalue < 0.05, set as "UP" 
    de$diffexpressed[de$log2FoldChange > 0.0 & de$pvalue < 0.05] <- "UP"
    # if log2Foldchange < -0.0 and pvalue < 0.05, set as "DOWN"
    de$diffexpressed[de$log2FoldChange < -0.0 & de$pvalue < 0.05] <- "DOWN"
    de$delabel <- NA
    
    # label all up and downregulated genes
    # de[!(is.na(de$diffexpressed)),]$delabel <-  de[!(is.na(de$diffexpressed)),]$symbol
    
    # label top up and downregulated genes
    # largest -log10(p-value)
    lowN = c("BMP4_diff_3w - BMP4_diff_2w|untreated" , "CNTF_diff_3w - CNTF_diff_2w|untreated")
    highN = dge_res_sheets_key$Comparison[-c(3,10)]
    if (name %in%  lowN) {
        N = 5 # number of genes - will depend on the number of significant gene distribution for the DE comparison
    } else {N = 10}
    
    sym = de[head(order(abs(-log10(de$pvalue)), decreasing = TRUE),N),]$symbol 
    de[head(order(abs(-log10(de$pvalue)), decreasing = TRUE),N),]$delabel <- paste0("italic('",sym,"')" )
    # largest log2(FC), also significant
    sigde <- de %>% dplyr::filter(pvalue < 0.05)
    labelid <- sigde[head(order(abs(sigde$log2FoldChange), decreasing = TRUE),7),]$id
    
    sym =  de[de$id %in% labelid,]$symbol
    de[de$id %in% labelid,]$delabel <- paste0("italic('",sym,"')" )
    
    xlim = max(abs(de$log2FoldChange))+(max(abs(de$log2FoldChange))*0.3)
    ylim =  max(-log10(de$pvalue))+(max(-log10(de$pvalue))*1)
    
    uplabel = de %>% filter(is.na(delabel)==FALSE) %>% dplyr::filter(diffexpressed=="UP") 
    dwlabel = de %>% filter(is.na(delabel)==FALSE) %>% dplyr::filter(diffexpressed=="DOWN") 

    
    vol_plot <- ggplot(data=de, aes(x=log2FoldChange, y=-log10(pvalue), col=diffexpressed, label=delabel)) +
            geom_point(size = 1, alpha = 0.5) + 
            scale_x_continuous(expand = c(0, 0), limits=c(-xlim,xlim)) +
            scale_y_continuous(expand = c(0, 0), limits=c(0,ylim)) +
            theme_classic(base_size=18) +
            
      geom_label_repel(
                    data = dwlabel,
                    aes(label = delabel),
                     size=6,
                     min.segment.length = unit(0.5, 'lines'), 
                     nudge_y = 5.9,
                     # nudge_x = 7.9,
                     xlim = c(NA, -1),
                     max.overlaps=Inf,
                     parse = TRUE,
                    seed = 1) +
      
      geom_label_repel(
                    data = uplabel,
                    aes(label = delabel),
                     size=6,
                     min.segment.length = unit(0.5, 'lines'),
                     nudge_y = 5.9,
                     # nudge_x = 7.9,
                     xlim = c(1, NA),
                     max.overlaps=Inf,
                     parse = TRUE,
                     seed = 1) +
      
            scale_color_manual(values=c("#788feb", "#942025", "black")) +
            # geom_vline(xintercept=c(-0.6, 0.6), col="black") +
            geom_hline(yintercept=-log10(0.05), col="black", linetype = 2) + 
            ggtitle(name) +
            theme(plot.title = element_text(hjust = 0.5, size=18),
                  legend.position = "none")
    
    ggsave(file=paste0(workdir,"/volcano_plot/volcano_",name,".svg"), plot=vol_plot, width=6.5, height=6.5)
    
    plotl <- c(plotl, list(vol_plot))
    
    ## extra: for gene-term plot querying
    # de_genes <- de[!is.na(de$delabel),]$delabel
  }

BMP4_diff_2w - progenitor|untreated

BMP4_diff_3w - progenitor|untreated

BMP4_diff_3w - BMP4_diff_2w|untreated

CNTF_diff_2w - progenitor|untreated

CNTF_diff_2w - BMP4_diff_2w|untreated

CNTF_diff_3w - progenitor|untreated

CNTF_diff_3w - BMP4_diff_3w|untreated

CNTF_diff_3w - CNTF_diff_2w|untreated

CNTF_diff_3w - BMP4_diff_3w|treated

treated - untreated|BMP4_diff_3w

treated - untreated|CNTF_diff_3w

# apply function to one select comparison
# i=3
# lapply(i, plot_volcano)

differential_analysis_wo2W_BMP4_CNTF_wo2W

# # load differentiated BMP4 and CNTF marker genes
# res.file<-paste0(workdir, "/analysed_data/differential_analysis_wo2W_BMP4_CNTF_wo2W.xlsx")
# dge_res_sheets <- readxl::excel_sheets(res.file)[-c(1:4)]
# dge_res_sheets <- data.frame(sheets = dge_res_sheets, row.names = dge_res_sheets)
# dge_res_sheets_key <- readxl::read_xlsx(res.file, sheet = "Comparison Key")
# 
# # replace long sheet name from the keys
# dge_res_sheets[dge_res_sheets_key$Key, ] <- dge_res_sheets_key$Comparison
# 
# # select comparisons of interest 
# dgeBulk <- imap(rownames(dge_res_sheets), ~readxl::read_xlsx(res.file, sheet = .x)) %>%
#     setNames(nm = dge_res_sheets$sheets)
# # dgeBulk_sel <- dgeBulk[c("BMP4_diff_3w - progenitor|untreated",
# #                          "CNTF_diff_3w - progenitor|untreated")]
# dgeBulk_sel <- dgeBulk[c(dge_res_sheets_key$Comparison)]
# dgeBulk_sel = dgeBulk_sel[-c(7)]
# # create function for plot
# plot_volcano <- function(i) {
#   
#     # select relevant comparison
#     de = dgeBulk_sel[[i]]
#     name=names(dgeBulk_sel)[i]
#     # print(name)
#     
#     # ensure empty gene symbols are replaced with ensembl IDs
#     de[is.na(de$symbol),]$symbol <- de[is.na(de$symbol),]$id
# 
#     # add a column of NAs
#     de$diffexpressed <- NA
#     # if log2Foldchange > 0.0 and pvalue < 0.05, set as "UP" 
#     de$diffexpressed[de$log2FoldChange > 0.0 & de$pvalue < 0.05] <- "UP"
#     # if log2Foldchange < -0.0 and pvalue < 0.05, set as "DOWN"
#     de$diffexpressed[de$log2FoldChange < -0.0 & de$pvalue < 0.05] <- "DOWN"
#     de$delabel <- NA
#     
#     # label all up and downregulated genes
#     # de[!(is.na(de$diffexpressed)),]$delabel <-  de[!(is.na(de$diffexpressed)),]$symbol
#     
#     # label top up and downregulated genes
#     # largest -log10(p-value)
#     lowN = c("BMP4_diff_3w - BMP4_diff_2w|untreated" , "CNTF_diff_3w - CNTF_diff_2w|untreated")
#     highN = dge_res_sheets_key$Comparison[-c(3,10)]
#     if (name %in%  lowN) {
#         N = 5 # number of genes - will depend on the number of significant gene distribution for the DE comparison
#     } else {N = 10}
#     
#     sym = de[head(order(abs(-log10(de$pvalue)), decreasing = TRUE),N),]$symbol 
#     de[head(order(abs(-log10(de$pvalue)), decreasing = TRUE),N),]$delabel <- paste0("italic('",sym,"')" )
#     
#     # largest log2(FC), also significant
#     sigde <- de %>% dplyr::filter(pvalue < 0.05)
#     labelid <- sigde[head(order(abs(sigde$log2FoldChange), decreasing = TRUE),7),]$id
#     sym =  de[de$id %in% labelid,]$symbol
#     de[de$id %in% labelid,]$delabel <- paste0("italic('",sym,"')" )
#     
#     xlim = max(abs(de$log2FoldChange))+(max(abs(de$log2FoldChange))*0.3)
#     ylim = max(-log10(de$pvalue))+(max(-log10(de$pvalue))*0.5)
#     ynud = 
#     
#     uplabel = de %>% filter(is.na(delabel)==FALSE) %>% dplyr::filter(diffexpressed=="UP") 
#     dwlabel = de %>% filter(is.na(delabel)==FALSE) %>% dplyr::filter(diffexpressed=="DOWN") 
# 
#     
#     vol_plot <- ggplot(data=de, aes(x=log2FoldChange, y=-log10(pvalue), col=diffexpressed, label=delabel)) +
#             geom_point(size = 1, alpha = 0.5) + 
#             scale_x_continuous(expand = c(0, 0), limits=c(-xlim,xlim)) +
#             scale_y_continuous(expand = c(0, 0), limits=c(0,ylim)) +
#             theme_classic(base_size=18) +
#             
#       geom_label_repel(
#                     data = dwlabel,
#                     aes(label = delabel),
#                      size=6,
#                      min.segment.length = unit(0.5, 'lines'), 
#                      nudge_y = 5.9,
#                      # nudge_x = 7.9,
#                      xlim = c(NA, -1),
#                      max.overlaps=Inf,
#                      parse = TRUE,
#                     seed = 1) +
#       
#       geom_label_repel(
#                     data = uplabel,
#                     aes(label = delabel),
#                      size=6,
#                      min.segment.length = unit(0.5, 'lines'), 
#                      nudge_y = 5.9,
#                      # nudge_x = 7.9,
#                      xlim = c(1, NA),
#                      max.overlaps=Inf,
#                      parse = TRUE,
#                      seed = 1) +
#       
#             scale_color_manual(values=c("#788feb", "#942025", "black")) +
#             # geom_vline(xintercept=c(-0.6, 0.6), col="black") +
#             geom_hline(yintercept=-log10(0.05), col="black", linetype = 2) + 
#             ggtitle(name) +
#             theme(plot.title = element_text(hjust = 0.5, size=18),
#                   legend.position = "none")
#     
#     # ggsave(file=paste0(workdir,"/volcano_plot/volcano_",name,".svg"), plot=vol_plot, width=5.5, height=5.5)
#     
#     return(vol_plot)
#     
#     ## extra: for gene-term plot querying
#     # de_genes <- de[!is.na(de$delabel),]$delabel
#   }
# apply function to one select comparison
# i=3
# lapply(i, plot_volcano)

Venn Diagrams

GO Term Enrichment